{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Improving Triangle counting in LightGraphs\n",
    "We want to make triangle counting faster by improving the algorithm.\n",
    "BenchmarkTools is used to make sure that we are measuring times with sufficient accuracy."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "using LightGraphs\n",
    "using BenchmarkTools"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Original Implementation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "local_clustering (generic function with 3 methods)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "function local_clustering(g::AbstractGraph, v::Integer)\n",
    "    k = degree(g, v)\n",
    "    k <= 1 && return (0, 0)\n",
    "    neighs = neighbors(g, v)\n",
    "    c = 0\n",
    "    for i in neighs, j in neighs\n",
    "        i == j && continue\n",
    "        if has_edge(g, i, j)\n",
    "            c += 1\n",
    "        end\n",
    "    end\n",
    "    return is_directed(g) ? (c , k*(k-1)) : (div(c,2) , div(k*(k-1),2))\n",
    "end\n",
    "function local_clustering(g::AbstractGraph, vs = vertices(g))\n",
    "    ntriang = zeros(Int, length(vs))\n",
    "    nalltriang = zeros(Int, length(vs))\n",
    "    i = 0\n",
    "    for (i, v) in enumerate(vs)\n",
    "        ntriang[i], nalltriang[i] = local_clustering(g, v)\n",
    "    end\n",
    "    return ntriang, nalltriang\n",
    "end\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using Memory to Save Time\n",
    "The problem with the original implementation is that $degree(g, v)$ calls to $has_edge$ which is a $log(deg)$ operation.\n",
    "\n",
    "If we use a two pass algorithm to precompute a constant time lookup table.\n",
    "The downside is that this lookup table requires $nv(g)$ storage."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "local_clustering! (generic function with 6 methods)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "function local_clustering!(storage::AbstractVector{Int}, g::AbstractGraph, v::Integer)\n",
    "    k = degree(g, v)\n",
    "    k <= 1 && return (0, 0)\n",
    "    neighs = neighbors(g, v)\n",
    "    tcount = 0\n",
    "    for i in neighs\n",
    "        storage[i] = v\n",
    "    end\n",
    "    \n",
    "    for i in neighs\n",
    "        for j in neighbors(g, i)\n",
    "            i == j && continue\n",
    "            if storage[j] == v\n",
    "                tcount += 1\n",
    "            end\n",
    "        end\n",
    "    end\n",
    "    \n",
    "    return is_directed(g) ? (tcount , k*(k-1)) : (div(tcount,2) , div(k*(k-1),2))\n",
    "end\n",
    "\n",
    "function local_clustering!(storage::AbstractVector, g::AbstractGraph, vs = vertices(g))\n",
    "    ntriang = zeros(Int, length(vs))\n",
    "    nalltriang = zeros(Int, length(vs))\n",
    "    i = 0\n",
    "    for (i, v) in enumerate(vs)\n",
    "        ntriang[i], nalltriang[i] = local_clustering!(storage, g, v)\n",
    "    end\n",
    "    return ntriang, nalltriang\n",
    "end"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Can we save Memory by using bools?\n",
    "\n",
    "We can cut the memory footprint by a factor of 8 by using a Bool Array.\n",
    "But then we have to reset it after every vertex."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "local_clustering! (generic function with 6 methods)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "function local_clustering!(storage::AbstractVector{Bool}, g::AbstractGraph, v::Integer)\n",
    "    k = degree(g, v)\n",
    "    k <= 1 && return (0, 0)\n",
    "    neighs = neighbors(g, v)\n",
    "    tcount = 0\n",
    "    for i in neighs\n",
    "        storage[i] = true\n",
    "    end\n",
    "    \n",
    "    for i in neighs\n",
    "        for j in neighbors(g, i)\n",
    "            i == j && continue\n",
    "            if storage[j]\n",
    "                tcount += 1\n",
    "            end\n",
    "        end\n",
    "    end\n",
    "    \n",
    "    return is_directed(g) ? (tcount , k*(k-1)) : (div(tcount,2) , div(k*(k-1),2))\n",
    "end\n",
    "function local_clustering!(storage::AbstractVector{Bool}, g::AbstractGraph, vs = vertices(g))\n",
    "    ntriang = zeros(Int, length(vs))\n",
    "    nalltriang = zeros(Int, length(vs))\n",
    "    i = 0\n",
    "    for (i, v) in enumerate(vs)\n",
    "        ntriang[i], nalltriang[i] = local_clustering!(storage, g, v)\n",
    "        for w in neighbors(g, v)\n",
    "            storage[w] = false\n",
    "        end\n",
    "    end\n",
    "    return ntriang, nalltriang\n",
    "end"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Shootout\n",
    "Who is faster?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{10000, 49975} undirected simple Int64 graph"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "n,k = 10000,5\n",
    "g = barabasi_albert(n,k)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "BenchmarkTools.Trial: \n",
       "  memory estimate:  156.44 KiB\n",
       "  allocs estimate:  5\n",
       "  --------------\n",
       "  minimum time:     79.872 ms (0.00% GC)\n",
       "  median time:      87.567 ms (0.00% GC)\n",
       "  mean time:        97.505 ms (0.00% GC)\n",
       "  maximum time:     306.803 ms (0.00% GC)\n",
       "  --------------\n",
       "  samples:          52\n",
       "  evals/sample:     1\n",
       "  time tolerance:   5.00%\n",
       "  memory tolerance: 1.00%"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "@benchmark local_clustering(g)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "BenchmarkTools.Trial: \n",
       "  memory estimate:  156.44 KiB\n",
       "  allocs estimate:  5\n",
       "  --------------\n",
       "  minimum time:     8.330 ms (0.00% GC)\n",
       "  median time:      10.456 ms (0.00% GC)\n",
       "  mean time:        12.099 ms (0.06% GC)\n",
       "  maximum time:     82.420 ms (0.00% GC)\n",
       "  --------------\n",
       "  samples:          412\n",
       "  evals/sample:     1\n",
       "  time tolerance:   5.00%\n",
       "  memory tolerance: 1.00%"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "storage = zeros(Int, nv(g))\n",
    "@benchmark local_clustering!($storage, $g) setup=(storage=zeros(Int, nv($g)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "BenchmarkTools.Trial: \n",
       "  memory estimate:  156.44 KiB\n",
       "  allocs estimate:  5\n",
       "  --------------\n",
       "  minimum time:     8.509 ms (0.00% GC)\n",
       "  median time:      9.774 ms (0.00% GC)\n",
       "  mean time:        10.338 ms (0.10% GC)\n",
       "  maximum time:     25.713 ms (0.00% GC)\n",
       "  --------------\n",
       "  samples:          482\n",
       "  evals/sample:     1\n",
       "  time tolerance:   5.00%\n",
       "  memory tolerance: 1.00%"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "@benchmark local_clustering!($storage, $g) setup=(storage=zeros(Bool, nv($g)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Bool Array Wins!\n",
    "\n",
    "It looks like the bool array is slightly faster even though we have to reset it. Lets use that."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Julia 0.6.0-dev",
   "language": "julia",
   "name": "julia-0.6"
  },
  "language_info": {
   "file_extension": ".jl",
   "mimetype": "application/julia",
   "name": "julia",
   "version": "0.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
